We showcase scMC’s capability of detecting context-shared and -specific biological signals by applying it to two mouse skin scRNA-seq datasets containing cells from control and Hedgehog (Hh) activation conditions during skin wound healing.
library(scMC)
library(Seurat)
library(patchwork)
library(dplyr)
library(ggplot2)
scMC takes a list of multiple digital data matrices as input. Features should be in rows and cells in columns. Rownames and colnames should be included.
The scRNA datasets we demonstrated here, including two count data matrices for the control and Hh activation conditions, can be downloaded via this shared Google Drive link.
load("/Users/suoqinjin/Documents/scMC_SeuratWrapper/tutorial/data_dermis.rda")
data.input <- data_dermis # a list of count data matrix, one per dataset
sample.name <- names(data.input)
object.list <- vector("list", length(sample.name))
names(object.list) <- sample.name
for (i in 1:length(object.list)) {
# Initialize the Seurat object with the raw (non-normalized data) for each dataset
x <- CreateSeuratObject(counts = data.input[[i]], min.cells = 3, min.features = 200, project = sample.name[i])
# calculate mitochondrial QC metrics
x[["percent.mt"]] <- PercentageFeatureSet(x, pattern = "^mt-")
x$sample.name <- sample.name[i]
x <- RenameCells(x, new.names = paste0(Cells(x), "_", x$sample.name))
object.list[[i]] <- x
rm(x)
}
lapply(object.list, function(x) dim(x@assays$RNA@data))
#> $Control
#> [1] 17099 2878
#>
#> $`Hh activation`
#> [1] 16119 1802
nFeature_RNA1 = c(7000, 7000); nCount_RNA1 = c(40000, 40000); percent.mt1 = c(10, 10)
for (i in 1:length(object.list)) {
# Visualize QC metrics as a violin plot
gg <- VlnPlot(object.list[[i]], features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.0001,cols=c("#a6cee3")) + geom_hline(yintercept = percent.mt1[i], linetype = 2)
print(gg)
cowplot::save_plot(filename=paste0("QC1_", sample.name[i],"_.pdf"), plot=gg, base_width = 7, base_height = 4.5)
# VlnPlot(object.list[[i]], features = c("percent.mt"))+ geom_hline(yintercept = 15, linetype = 2)
Sys.sleep(2)
plot1 <- FeatureScatter(object.list[[i]], feature1 = "nCount_RNA", feature2 = "percent.mt", pt.size = 0.1,cols=c("black")) + geom_hline(yintercept = percent.mt1[i], linetype = 2)
plot2 <- FeatureScatter(object.list[[i]], feature1 = "nCount_RNA", feature2 = "nFeature_RNA", pt.size = 0.1,cols=c("black")) + geom_hline(yintercept = nFeature_RNA1[i], linetype = 2)
gg <- wrap_plots(plots = plot1, plot2, ncol = 2)
print(gg)
cowplot::save_plot(filename=paste0("QC2_", sample.name[i],"_.pdf"), plot=gg, base_width = 10, base_height = 3.5)
object.list[[i]] <- subset(object.list[[i]], subset = (nFeature_RNA < nFeature_RNA1[i]) & (nCount_RNA < nCount_RNA1[i]) & (percent.mt < percent.mt1[i]))
}
lapply(object.list, function(x) dim(x@assays$RNA@data))
#> $Control
#> [1] 17099 2847
#>
#> $`Hh activation`
#> [1] 16119 1792
object.list <- lapply(X = object.list, FUN = function(x) {
x <- NormalizeData(x, verbose = FALSE)
x <- FindVariableFeatures(x, verbose = FALSE, nfeatures = 3000)
#x <- FindVariableFeatures(x, selection.method = "mean.var.plot", verbose = FALSE, mean.cutoff = c(0.01, 5), dispersion.cutoff = c(0.25, Inf))
# perform scaling on the previously identified variable features
x <- ScaleData(x, verbose = FALSE)
})
future::plan("multiprocess", workers = 4)
# compute SNN
object.list <- identifyNeighbors(object.list)
#> Performing PCA in Dataset 1
#> Building SNN on the PCA space in Dataset 1
#> Performing PCA in Dataset 2
#> Building SNN on the PCA space in Dataset 2
# identify clusters
object.list <- identifyClusters(object.list)
#> Identifying cell clusters in Dataset 1
#> Peforming hierarchical clustering on the computed consensus matrix
#> The initial guessed number of clusters is 17
#> Identifying cell clusters in Dataset 2
#> Peforming hierarchical clustering on the computed consensus matrix
#> The initial guessed number of clusters is 15
features.integration = identifyIntegrationFeatures(object.list)
object.list <- identifyConfidentCells(object.list, features.integration)
object.list <- identifyMarkers(object.list)
#> Identifying marker genes of cell clusters in Dataset 1
#> Calculating cluster 1
#> Calculating cluster 2
#> Calculating cluster 3
#> Calculating cluster 4
#> Calculating cluster 7
#> Calculating cluster 8
#> Calculating cluster 9
#> Calculating cluster 10
#> Calculating cluster 11
#> Calculating cluster 12
#> Identifying marker genes of cell clusters in Dataset 2
#> Calculating cluster 1
#> Calculating cluster 2
#> Calculating cluster 3
#> Calculating cluster 4
#> Calculating cluster 5
#> Calculating cluster 6
#> Calculating cluster 7
#> Calculating cluster 8
#> Calculating cluster 10
#> Calculating cluster 12
structured_mat <- learnTechnicalVariation(object.list, features.integration)
nPC = 40
combined <- FindNeighbors(combined, reduction = "scMC", dims = 1:nPC)
combined <- FindClusters(combined, algorithm = 4, resolution = 0.05)
levels(Idents(combined))
#> [1] "1" "2" "3" "4" "5" "6"
combined <- BuildClusterTree(combined, reorder = T, reorder.numeric = T, verbose = F)
combined <- RunUMAP(combined, reduction='scMC', dims = 1:nPC)
dimPlot(combined, reduction = "umap", group.by = c("sample.name","ident"))
### Quick exploration of the marker genes
combined <- ScaleData(combined, feature = rownames(combined), verbose = FALSE)
markers <- FindAllMarkers(combined, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
#> Calculating cluster 1
#> Calculating cluster 2
#> Calculating cluster 3
#> Calculating cluster 4
#> Calculating cluster 5
#> Calculating cluster 6
Misc(combined, slot = 'markers') <- markers
top10 <- markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
doHeatmap(combined, features=top10$gene, group.by='ident', additional.group.by = c('sample.name'))
#> Scale for 'fill' is already present. Adding another scale for 'fill', which
#> will replace the existing scale.
features = c('Lox','Ptch1','Cd68','Pecam1','Myh11','Plp1')
gg <- featurePlot(combined, features = features, show.legend = F, show.axes = F)
#> Scale for 'colour' is already present. Adding another scale for 'colour',
#> which will replace the existing scale.
#> Scale for 'colour' is already present. Adding another scale for 'colour',
#> which will replace the existing scale.
#> Scale for 'colour' is already present. Adding another scale for 'colour',
#> which will replace the existing scale.
#> Scale for 'colour' is already present. Adding another scale for 'colour',
#> which will replace the existing scale.
#> Scale for 'colour' is already present. Adding another scale for 'colour',
#> which will replace the existing scale.
#> Scale for 'colour' is already present. Adding another scale for 'colour',
#> which will replace the existing scale.
gg <- patchwork::wrap_plots(plots = gg, ncol = 3)
gg
cowplot::save_plot(filename=paste0("overlayKnownMarkers_integration_dermis", "_umap.pdf"), plot=gg, base_width = 5.5, base_height = 6)
new.cluster.ids <- c("Immune", "Hh-inactive Fib", "Hh-active Fib", "Schwann", "Endotheial","Muscle")
names(new.cluster.ids) <- levels(combined)
combined <- RenameIdents(combined, new.cluster.ids)
new.order <- c("Hh-inactive Fib", "Hh-active Fib","Immune","Endotheial", "Muscle", "Schwann")
combined@active.ident <- factor(combined@active.ident, levels = new.order)
p1 <- dimPlot(combined, reduction = "umap", group.by = "sample.name", colors.ggplot = T)
p2 <- dimPlot(combined, reduction = "umap", label = F)
gg <- patchwork::wrap_plots(p1, p2, ncol = 2)
gg
cowplot::save_plot(filename=paste0("integration_dermis", "_umap.pdf"), plot=gg, base_width = 8, base_height = 3)
# Split the plot into each dataset
dimPlot(combined, reduction = "umap", split.by = "sample.name", combine = T)
markers <- FindAllMarkers(combined, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
#> Calculating cluster Hh-inactive Fib
#> Calculating cluster Hh-active Fib
#> Calculating cluster Immune
#> Calculating cluster Endotheial
#> Calculating cluster Muscle
#> Calculating cluster Schwann
Misc(combined, slot = 'markers') <- markers
top10 <- markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
doHeatmap(combined, features=top10$gene, group.by='ident', additional.group.by = c('sample.name'))
#> Scale for 'fill' is already present. Adding another scale for 'fill', which
#> will replace the existing scale.
features = c('Lox','Ptch1','Cd68','Pecam1','Myh11','Plp1')
gg <- StackedVlnPlot(combined, features = features)
gg
cowplot::save_plot(paste0("violin_markers", "_integration_dermis", ".pdf"), gg, base_height = 3.5, base_width = 2)
features = c('Lox','Ptch1','Cd68','Pecam1','Myh11','Plp1')
gg <- StackedVlnPlot(combined, features = features, split.by = "sample.name")
gg
features = c('Pdgfra','Lox','Ptch1','Gli1')
gg <- vlnPlot(combined, features = features, stat.add = T, comparisons = list(c("Hh-inactive Fib", "Hh-active Fib")))
patchwork::wrap_plots(plots = gg, ncol = 4)
dotPlot(combined, features =c('Lox','Ptch1','Cd68','Pecam1','Myh11','Plp1'))
#> Scale for 'colour' is already present. Adding another scale for 'colour',
#> which will replace the existing scale.
dotPlot(combined, features = c('Pdgfra','Lox','Ptch1','Gli1'), split.by = "sample.name", idents = c("Hh-inactive Fib", "Hh-active Fib"))
combined$clusters.final <- Idents(combined)
gg1 <- computeProportion(combined, x = "clusters.final", fill = "sample.name")
gg2 <- computeProportion(combined, x = "sample.name", fill = "clusters.final", colors.use = scPalette(6))
gg1 + gg2
markers.sample <- identifyDEG(combined, group.by = "sample.name")
#> Identify DEG using FindAllMarkers
#> Calculating cluster Control
#> Calculating cluster Hh activation
#> Scale for 'fill' is already present. Adding another scale for 'fill', which
#> will replace the existing scale.
#> Saving 3 x 4.5 in image
res.go <- getEnrichedGO(markers, category = "BP", do.simplify = F, idents = c("Hh-inactive Fib", "Hh-active Fib"))
#> Scale for 'colour' is already present. Adding another scale for 'colour',
#> which will replace the existing scale.
#> Saving 6 x 4.5 in image
combined <- ScaleData(combined)
combined$clusters.final <- Idents(combined)
save(combined, file = "scMC_dermis_SeuratV4.RData")
# combined.loom <- as.loom(combined, filename = "scMC_dermis_SeuratV4.loom", verbose = FALSE)
# combined.loom$close_all()
write.table(GetAssayData(combined),file = "preprocessedData_integration_dermis.txt",sep = '\t')
write.table(combined@meta.data,file = "metaData_integration_dermis.txt",sep = '\t')
write.table(Idents(combined),file = "identity_integration_dermis.txt",sep = '\t')
write.table(Embeddings(combined, "scMC"),file = "integratedSpace_scMC_integration.txt",sep = '\t')
write.table(combined[["umap"]]@cell.embeddings,file = "projectedData_umap_integration_dermis.txt",sep = '\t')
write.table(markers,file = "markers_integration_dermis.txt",sep = '\t')
write.table(top10,file = "markersTop10_integration_dermis.txt",sep = '\t')
# combined.loom <- loomR::connect(filename = "scMC_dermis_SeuratV4.loom", mode = "r")
# combined <- as.Seurat(combined.loom)